


import scrapy


class MySpider(scrapy.Spider):
    #用于区别Spider
    name = "test2"
    #允许访问的域
    allowed_domains = ['chenjinwei.com']
    start_urls = ['http://chenjinwei.com/']

    def __init__(self):  
        self.headers = {  
            'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',  
            'Accept-Encoding':'gzip, deflate',  
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'  
        }  

    def parse(self,response):
        #爬取的地址
        
        yield scrapy.Request(url='http://chenjinwei.com/',callback=self.parse_category)

    #爬取方法
    def parse_category(self, response):
        print(response.body)
        #for url in response.xpath("//ul[@class='fly-list']/li/h2/a"):
        #    yield {
        #        "类别名" : url.xpath("./text()").extract_first(),
        #        "路径" : url.xpath("./@href").extract_first(),
        #    }
            #cat_url = response.urljoin(url)
            #yield scrapy.Request(url=cat_url,callback=self.parse_app_list)

    #def parse_app_list(self,response):
    #    for app_url in response.xpath("//ul[@class='app-list clearfix']/li/div/div/a[@class='name ofh']"):
    #        yield {
    #            'app_name':app_url.xpath('./text()').extract_first(),
    #            'app_url':app_url.xpath('./@href').extract_first(),
    #            }